### Step 1: Data Collection
# Read Lake Cycle excel sheet
library(readxl)
lake_xls_f <- '/Users/andy/Dropbox/TSCreator/TSCreator development/Developers/Andy/Projects/ML-Data Mining/SpectralAnalysis/LakeCycleHomework_AnalysisExcel.XLS'
excel_sheets(lake_xls_f)
## [1] "Plot of Input" "Input Data" "Data Worksheet"
## [4] "Spectral Analysis" "Sheet4"
lake_xls <- read_excel(lake_xls_f, sheet='Input Data')
# Complete Interval I of Newark Basin
# Interval I = 2768-3370 m; 706 pts
nb_d <- data.frame(lake_xls[8:713,c(16,17,18)])
colnames(nb_d) <- c('Point', 'Depth (m)', 'Depth Rank')
# Depth Vec
dv <- as.numeric(nb_d$`Depth (m)`)
#diff(dv)
max_d <- max(dv)
min_d <- min(dv)
# window of depth
window <- max(dv)-min(dv)
dr <- as.numeric(nb_d$`Depth Rank`)
max_dr <- max(dr)
min_dr <- min(dr)
plot(nb_d$`Depth (m)`, nb_d$`Depth Rank`, xlab='Depth(meters)',
ylab='Depth Rank', type='l', col='darkblue', lwd=2,
main='Input Data for Spectral Analysis'
)
#________________________________________________________________________________#
nb_d.avg <- mean(dr)
nb_d.dm <- as.numeric(nb_d$`Depth Rank`) - nb_d.avg
# plot
plot(nb_d$`Depth (m)`, nb_d$`Depth Rank`, xlab='Depth(meters)',
ylab='', type='l', col='darkblue', lwd=2,
main='Demeaned Input Data(Red Curve)', ylim=c(-4,4)
)
abline(h=nb_d.avg, lty=2, col='darkblue')
lines(nb_d$`Depth (m)`, nb_d.dm, col='red')
abline(h=0, lty=2, col='red')
# Filtering - White noise removed
# (3pt moving "Hanning" average)
# 0.5*n + 0.25*(n-1 + n+1)
hanning_ma <- function(df, n) {
x <- 0.5 * df[n] + 0.25*(df[n-1] + df[n+1])
return(x)
}
nb_d.filt <- c()
i <- 1
nr <- length(nb_d.dm)
for (n in 2:(nr-1)) {
h <- hanning_ma(nb_d.dm, n)
nb_d.filt <- c(nb_d.filt, h)
}
# plot
plot(nb_d$`Depth (m)`, nb_d.dm, xlab='Depth(meters)',
ylab='Value', type='l', col='red', lwd=1,
main='Demeaned + Filtered Input Data', ylim=c(-4,4)
)
abline(h=0, lty=2, col='red')
lines((nb_d$`Depth (m)`)[2:(nr-1)], nb_d.filt, col='green')
nb_d.tap <- nb_d.filt
i <- 1
nr <- length(nb_d$`Depth (m)`)
n <- length(nb_d.filt)
tap_perc_n <- n * 10/100 # 10% tapering to reduce the effect of the terminations of data at the ends of the window
for (j in 1:tap_perc_n) {
w = j * pi / tap_perc_n
nb_d.tap[j] = nb_d.filt[j] * 0.5 * (1-cos(w))
}
for (j in (n-tap_perc_n):n) {
w = (512-j) * pi / tap_perc_n
nb_d.tap[j] = nb_d.filt[j] * 0.5 * (1-cos(w))
}
# plot
plot(nb_d$`Depth (m)`, nb_d.dm, xlab='Depth(meters)',
ylab='', type='l', col='red', lwd=1,
main='Demeaned + Filtered + Split cosine tapered(10%) Input Data', ylim=c(-8,8)
)
abline(h=0, lty=2, col='red')
lines((nb_d$`Depth (m)`)[2:(nr-1)], nb_d.filt, col='green')
lines((nb_d$`Depth (m)`)[2:(nr-1)], nb_d.tap, col='black')
# plot(nb_d$`Depth (m)`[], nb_d.tap, xlab='Depth(meters)',
# ylab='', type='l', col='red', lwd=1,
# main='Demeaned + Filtered Input Data', ylim=c(-8,8)
# )
#________________________________________________________________________________#
plot.frequency.spectrum <- function(X.k,
xlimits=c(0,length(X.k)/2),
xlab='Frequency',
ylab='Amplitude',
density=FALSE,
plot.type='l'
) {
x <- (0:(length(X.k)-1))
plot.data <- cbind(x, Mod(X.k))
# TODO: why this scaling is necessary?
plot.data[2:length(X.k),2] <- 2*plot.data[2:length(X.k),2]
N <- length(x)
freq <- x
amp <- plot.data[,2]
if(density) {
tot <- sum(amp)
amp <- amp/tot
print(amp)
ylim <- c(0, max(amp))
}
else {
tot <- 1
ylim=c(0,max(Mod(plot.data[,2])))
}
plot(freq, amp, lwd=2, main="", type=plot.type,
#xaxp=c(0, N, N/6),
xlim=xlimits, # show only half of the frequency
ylim=ylim,
xlab=xlab, ylab="Amplitude")
period <- (max_dr - min_dr) /freq
plot(period, amp, lwd=2, main="", type=plot.type,
#xaxp=c(0, N, N/6),
#xlim=xlimits, # show only half of the frequency
#ylim=ylim,
xlab=xlab,
ylab="Amplitude")
}
nb_d.fft <- fft(nb_d.tap)
#par(mfrow=c(2,1))
xlab = sprintf('Frequency (cycle per %f meters)', max_d - min_d)
plot.frequency.spectrum(nb_d.fft,
xlim=c(0,200),
xlab=xlab,
ylab='Amplitude'#,
#,plot.type = 'h'
)
plot.frequency.spectrum(nb_d.fft,
xlim=c(0,200),
xlab=xlab,
ylab='Amplitude'#,
#,plot.type = 'h'
)
n <- length(nb_d.fft)
R.f <- Mod(nb_d.fft)
I.f <- n * (R.f^2)
var.I.f <- var(I.f)
N <- length(R.f)
freq <- 1:N
df <- 1/sqrt(12)
lines(freq, R.f/qchisq(0.975, df), t='l', lty=2, col='green')
par(new=T)
plot(freq, R.f/qchisq(0.025, df), t='l', lty=2, col='red', xlim=c(0, 200)
, xaxt=NULL, yaxt=NULL, axes=FALSE, xlab="", ylab="")
#
# Calculate Amplitude
Amp <- R.f <- Mod(nb_d.fft)
# Calculate Power Spectrum
PowerSpec <- Amp^2
# Take half of it , coz mirrored
PowerSpec <- PowerSpec[1:(length(Amp)/2)]
PowerSpec.han <- c()
i <- 1
nr <- length(PowerSpec)
for (n in 2:(nr-1)) {
h <- hanning_ma(PowerSpec, n)
PowerSpec.han <- c(PowerSpec.han, h)
}
dp <- as.numeric(nb_d$`Depth (m)`)
window_width <- max(dp) - min(dp)
wavenum <- 1:(length(PowerSpec.han))
wavelen <- window_width/wavenum
TotPowerSpec <- sum(PowerSpec)
RelPowerSpec <- PowerSpec.han/TotPowerSpec
# Charts of Wavenumber vs. Smoothed Power
plot(wavenum, PowerSpec.han, t='h',
xlab=paste('Wavenumber (cycle/window) window=', window_width, "m"),
ylab='Power Spectra [Amplitude]^2'
)
plot(wavenum, PowerSpec.han, t='l',
xlab=paste('Wavenumber (cycle/window) window=', window_width, "m"),
ylab='Power Spectra [Amplitude]^2'
)
plot(wavenum, PowerSpec.han, t='h',
xlab=paste('Wavenumber (cycle/window) window=', window_width, "m"),
ylab='Power Spectra [Amplitude]^2',
xlim=c(0,50))
plot(wavenum, PowerSpec.han, t='l',
xlab=paste('Wavenumber (cycle/window) window=', window_width, "m"),
ylab='Power Spectra [Amplitude]^2',
xlim=c(0,50))
# Charts of Wavenumber vs. Relative Power
plot(wavenum, RelPowerSpec, t='h',
xlab=paste('Wavenumber (cycle/window) window=', window_width, "m"),
ylab='Relative Power Spectra [Amplitude]^2')
# Charts of Wavenumber vs. Relative Power
plot(wavenum, RelPowerSpec, t='l',
xlab=paste('Wavenumber (cycle/window) window=', window_width, "m"),
ylab='Relative Power Spectra [Amplitude]^2')
plot(wavenum, RelPowerSpec, t='h',
xlab=paste('Wavenumber (cycle/window) window=', window_width, "m"),
ylab='Relative Power Spectra [Amplitude]^2',
xlim=c(0,120))
# Charts of Wavenumber vs. Relative Power
plot(wavenum, RelPowerSpec, t='l',
xlab=paste('Wavenumber (cycle/window) window=', window_width, "m"),
ylab='Relative Power Spectra [Amplitude]^2',
xlim=c(0,120))
# Charts of Wavenumber vs. Relative Power
plot(wavenum, RelPowerSpec, t='h',
xlab=paste('Wavenumber (cycle/window) window=', window_width, "m"),
ylab='Relative Power Spectra [Amplitude]^2',
xlim=c(0,50))
# Charts of Wavenumber vs. Relative Power
plot(wavenum, RelPowerSpec, t='l',
xlab=paste('Wavenumber (cycle/window) window=', window_width, "m"),
ylab='Relative Power Spectra [Amplitude]^2',
xlim=c(0,50))
# )Wavelength vs. Smoothed Power Spectra
plot(wavelen, PowerSpec.han, t='h',
xlab='Wavelength (m)',
ylab='Power Spectra [Amplitude]^2')
plot(wavelen, PowerSpec.han, t='l',
xlab='Wavelength (m)',
ylab='Power Spectra [Amplitude]^2')
plot(wavelen, PowerSpec.han, t='h',
xlab='Wavelength (m)',
ylab='Power Spectra [Amplitude]^2',
xlim = c(0,40))
plot(wavelen, PowerSpec.han, t='h',
xlab='Wavelength (m)',
ylab='Power Spectra [Amplitude]^2',
xlim = c(0,10), ylim=c(0,2000))
plot(wavelen, PowerSpec.han, t='l',
xlab='Wavelength (m)',
ylab='Power Spectra [Amplitude]^2',
xlim = c(0,10), ylim=c(0,2000))
plot(wavelen, PowerSpec.han, t='l',
xlab='Wavelength (m)',
ylab='Power Spectra [Amplitude]^2',
xlim = c(0,100), ylim=c(0,10000))
# )Wavelength vs. Relative Power Spectra
plot(wavelen, RelPowerSpec, t='h',
xlab='Wavelength (m)',
ylab='Relative Power Spectra [Amplitude]^2',
xlim=c(0,10))
abline(v=15, lty=2)
d <- data.frame(WaveLength=wavelen, RelativePowerSpectra=RelPowerSpec)
ord <- order(d['RelativePowerSpectra'], decreasing=TRUE)
RelPowerSpec.sorted <- d[ord,]
RelPowerSpec.sorted$`WaveNumber(WindowWidth=601.68m)` <- wavenum[ord]
summary(nb_d)
## Point Depth (m) Depth Rank
## Length:706 Length:706 Length:706
## Class :character Class :character Class :character
## Mode :character Mode :character Mode :character
head(nb_d)
## Point Depth (m) Depth Rank
## 1 1 2768.8 0
## 2 2 2769.66 0.0179745
## 3 3 2770.51 0.0469082
## 4 4 2771.36 0
## 5 5 2772.22 0.348179
## 6 6 2773.07 0.0909985
tail(nb_d)
## Point Depth (m) Depth Rank
## 701 701 3366.21 0.441702
## 702 702 3367.06 0.172589
## 703 703 3367.92 0.002375
## 704 704 3368.77 0
## 705 705 3369.62 0
## 706 706 3370.48 0.270028
head(RelPowerSpec.sorted, 30)
## WaveLength RelativePowerSpectra WaveNumber(WindowWidth=601.68m)
## 8 75.210000 0.070802595 8
## 35 17.190857 0.051515077 35
## 26 23.141538 0.047056180 26
## 34 17.696471 0.044246146 34
## 9 66.853333 0.042747939 9
## 27 22.284444 0.042074969 27
## 36 16.713333 0.036760480 36
## 7 85.954286 0.035352798 7
## 25 24.067200 0.027436241 25
## 2 300.840000 0.022828097 2
## 33 18.232727 0.022509834 33
## 37 16.261622 0.022111619 37
## 28 21.488571 0.020977379 28
## 1 601.680000 0.018928085 1
## 3 200.560000 0.015032953 3
## 38 15.833684 0.014104991 38
## 4 150.420000 0.011164447 4
## 32 18.802500 0.010246809 32
## 110 5.469818 0.010094840 110
## 131 4.592977 0.008704774 131
## 39 15.427692 0.008236617 39
## 10 60.168000 0.007889947 10
## 130 4.628308 0.007574582 130
## 29 20.747586 0.007294461 29
## 24 25.070000 0.007263327 24
## 125 4.813440 0.006645236 125
## 5 120.336000 0.006486515 5
## 111 5.420541 0.006224745 111
## 40 15.042000 0.006005567 40
## 109 5.520000 0.005961621 109
#________________________________________________________________________________#
###
# Method #1: Assuming a peak is a Milankovitch Cycle; computing Sedimentation Rate
#Eccentricity 412, 123, and 95 k.y.
#Obliquity 41 k.y.
#Precession 23, and 19 k.y.
#Long-period Peak a = 300.84m (2)
#Medium-period Peak b = 75.21 m (8)
#Medium-period Peak c = 23.14m (26)
#Short-period Peak d = 17.19m (35)
#Average of adjacent short -period Peaks e & f = 5.4 m (110, 111)
#Short -period Peak g = 4.6m (130, 131)
hm <- c(8, 35, 26, 2, 110, 131)
mil_cycles <- c(412, 123, 95, 41, 23, 19)
eccentricity <- 123
sed_rate <- (RelPowerSpec.sorted$WaveLength[1]/3) / (mil_cycles/1000)
sed_rate
## [1] 60.84951 203.82114 263.89474 611.46341 1090.00000 1319.47368
# Unit: m/Ma
# a more accurate sedimentation rate can be computed.
plot.harmonic <- function(X.k, i, ts, acq.freq, color="red", xlab='time', add=TRUE) {
X.k.h <- rep(0,length(X.k))
X.k.h[i+1] <- X.k[i+1] # i-th harmonic
harmonic.trajectory <- get.trajectory(X.k.h, ts, acq.freq=acq.freq)
if (add == TRUE)
points(ts, harmonic.trajectory, type="l", col=color)
else
plot(ts, harmonic.trajectory, type="l", col=color, xlab=xlab)
return(harmonic.trajectory)
}
# returns the x.n time series for a given time sequence (ts) and
# a vector with the amount of frequencies k in the signal (X.k)
get.trajectory <- function(X.k,ts,acq.freq) {
N <- length(ts)
i <- complex(real = 0, imaginary = 1)
x.n <- rep(0,N) # create vector to keep the trajectory
f <- 0:(length(X.k)-1)
# Equation x_n = \frac{1}{N} sum_{0}^{N-1} (X_k * exp(i * 2 * pi * \frac{k * n}{ N }))
# X.k : amount of frequency k in the signal, each k-th value is a complex number including strength(amplitude) and phase shift
# N : number of samples
# n : current sample
# k : current frequency, between 0 Hz to N-1 Hz
# 1/N : not necessary but it gives the actual sizes of the time spikes
# n/N : percent of time we have gone through
# 2pik: the speed in radians/second
for(n in 0:(N-1)) { # compute each time point x_n based on freqs X.k
t <- n / N
x.n[n+1] <- sum(X.k * exp(i*2*pi*f*t)) / N
}
x.n * acq.freq
}
plot.show <- function(trajectory,
start_time = 0,
time=1,
harmonics=-1,
plot.freq=FALSE,
plot.type='l',
scale=1.5,
xlab='Time',
ylab='') {
acq.freq <- length(trajectory)/time # data acquisition frequency (Hz)
ts <- seq(start_time,(start_time + time)-1/acq.freq,1/acq.freq) # vector of sampling time-points (s)
X.k <- fft(trajectory)
x.n <- get.trajectory(X.k,ts, acq.freq=acq.freq) / acq.freq
if (plot.freq)
plot.frequency.spectrum(X.k, plot.type=plot.type)
max.y <- ceiling(scale*max(Mod(x.n)))
if (harmonics[1]==-1) {
min.y <- floor(min(Mod(x.n)))-1
} else {
min.y <- ceiling(-scale*max(Mod(x.n)))
}
plot(ts,x.n, type="l", lwd=3, ylim=c(min.y,max.y),
xlab=xlab, ylab=ylab)
#abline(h=min.y:max.y,v=0:time,lty=3, lwd=0.25)
#points(ts,trajectory,cex=0.5, col="red") # the data points we know
y <- rep(0, length(X.k))
if (harmonics[1]>-1) {
for(i in 0:length(harmonics)) {
r <- plot.harmonic(X.k, harmonics[i], ts, acq.freq, color=i+1)
y <- y + r
}
}
points(ts, y/scale, t='l', col='orange', lwd=2)
}
amp <- Mod(nb_d.fft)
N <- length(amp)
amp.h <- amp[0:(N/2)]
x <- 0:(N/2-1)
d <- data.frame(x, amp.h)
d.sorted <- d[order(amp.h, decreasing = TRUE),]
head(d.sorted, 20)
## x amp.h
## 9 8 135.57578
## 36 35 90.63089
## 27 26 88.92840
## 35 34 85.61107
## 28 27 82.44720
## 37 36 70.09743
## 3 2 68.08022
## 26 25 59.79256
## 2 1 54.02897
## 5 4 51.95751
## 111 110 50.74177
## 39 38 50.56573
## 38 37 49.17176
## 10 9 46.40244
## 29 28 43.74434
## 132 131 42.32149
## 34 33 41.86663
## 33 32 41.14620
## 126 125 39.84090
## 41 40 37.28518
hm <- c(8, 35, 26, 2, 110, 131)
plot.show(nb_d.tap, start_time = min_d, time = max_d - min_d, harmonics = hm,
xlab='Depth (meter)', ylab='', scale=0.5)
## Warning in xy.coords(x, y, xlabel, ylabel, log): imaginary parts discarded
## in coercion
## Warning in xy.coords(x, y): imaginary parts discarded in coercion
## Warning in xy.coords(x, y): imaginary parts discarded in coercion
## Warning in xy.coords(x, y): imaginary parts discarded in coercion
## Warning in xy.coords(x, y): imaginary parts discarded in coercion
## Warning in xy.coords(x, y): imaginary parts discarded in coercion
## Warning in xy.coords(x, y): imaginary parts discarded in coercion
## Warning in xy.coords(x, y): imaginary parts discarded in coercion
hm_l <- paste(hm, ' cycles')
hm_l <- c(hm_l, 'combined cycle')
legend('topleft', legend=hm_l, col = c(1, 2, 3, 4, 5, 6,'darkblue'),
lty = rep(1,7), cex=0.40)
library(multitaper)
Mspec <- spec.mtm(nb_d.tap, deltat = 0.85, jackknife = TRUE,
k=7, nw=4,
log='no', lwd=2,
xlim=c(0,0.25),
plot=FALSE)
x <- Mspec$freq
y <- Mspec$spec
plot(x, y, type='l', lwd=2
,xlim=c(0,0.2)
#,log='y'
)
The frequencies are plotted in a scale between 0 to 0.5 (In our newark data 0 ~ 0 and 0.5 ~ ). The frequency resolution is low now. But we can still see the peaks around the same frequency.
Mspec <- spec.mtm(nb_d.tap, deltat = 0.85, jackknife = TRUE,
k=7, nw=4,
log='no', lwd=2,
xlim=c(0,0.25)
)
plot(x, y, type='l', lwd=2
,xlim=c(0,0.2)
,log='y', main='Amplitudes ploted on logarithmic scale',
xlab=seq(0,200)
)
Xspec <- spec.pgram(nb_d.tap, log='dB')
ylimits <- c(1/10*min(Xspec$spec), 25*max(Xspec$spec))
plot(Xspec$freq, Xspec$spec, xlim=c(0,0.2), type='l', lwd=2
#, ylim=ylimits
)
df <- Xspec$df
lines(Xspec$freq, Xspec$spec/qchisq(0.975, df), t='l', lty=2, col='green')
lines(Xspec$freq, Xspec$spec/qchisq(0.025, df), t='l', lty=2, col='red')
bw <- Xspec$bandwidth
df <- Xspec$df
chi_lo <- df / qchisq(0.025, df)
chi_up <- df / qchisq(0.975, df)
#lines(Xspec$freq, Xspec$spec / chi_lo, col='green')
#par(new=T)
#plot(Xspec$freq, Xspec$spec * chi_up, xlim=c(0,0.2), col='red', type='l')
#plot(Xspec, xlim=c(0,0.2))
#plot(Xspec$freq, Xspec$spec, xlim=c(0,0.2), type='l', lwd=2)
#a <- qchisq(Xspec$spec * chi_up, df =Xspec$df)
#lines(Xspec$freq, pchisq(Xspec$spec, df=Xspec$df), col='red', xlim=c(0,0.2))
library(psd)
## Loaded psd (1.0.1) -- Adaptive multitaper spectrum estimation
d <- ts(nb_d.tap, frequency=1)
Pspec <- psdcore(d, ntapaer=2)
plot(Pspec$freq, Pspec$spec, type='l', xlim=c(0,0.2))
spp <- spectral_properties(Pspec[["taper"]], db.ci = TRUE)
psppu <- with(Pspec, create_poly(freq, dB(spec), spp$stderr.chi.upper))
#par(new=T)
plot(psppu$x.x, psppu$y.y, lty=2, type='l',
xlim=c(0, 0.2), col=c('green','red'),
axes=FALSE, xlab='', ylab='')
library(lomb)
data(ibex)
lsp(ibex[2:3],)
lsp(ibex$temp,times=ibex$hours,type='period',ofac=5)
# lynx contains evenly sampled data
lsp(lynx)
lynx.spec <- lsp(lynx,type='period',from=2,to=20,ofac=5)
summary(lynx.spec)
## Value
## Time Time
## Data lynx
## n 114
## Type period
## Oversampling 5
## From 2.0035
## To 19.483
## # frequencies 254
## PNmax 31.579
## At period 9.5763
## At frequency 0.10442
## P-value (PNmax) 1.9603e-12
lsp(nb_d.dm)
lsp(nb_d.tap)
lsp(nb_d.tap, type='period')
data(ibex)
ibex.spec <- lsp(ibex[,2:3],type='period',from=12,to=36,ofac=10, plot=FALSE)
op <- par(pch=16)
plot.lsp(ibex.spec, main="Periodogram of daily rhythms of Tb in Capra ibex",
cex.lab=1.3,log="", type="b",level=FALSE,xaxt="n")
axis(side=1,at=seq(12,36,by=6))
par(op)
data(lynx)
set.seed(444)
rand.times <- sample(1:length(lynx),30) # select a random vector of sampling times
randlsp(1000,lynx[rand.times],times=rand.times)
## Repeats: 10 20 30 40 50 60 70 80 90 100 110 120 130 140 150 160 170 180 190 200 210 220 230 240 250 260 270 280 290 300 310 320 330 340 350 360 370 380 390 400 410 420 430 440 450 460 470 480 490 500 510 520 530 540 550 560 570 580 590 600 610 620 630 640 650 660 670 680 690 700 710 720 730 740 750 760 770 780 790 800 810 820 830 840 850 860 870 880 890 900 910 920 930 940 950 960 970 980 990 1000
randlsp(x = nb_d.tap)
## Repeats: 10 20 30 40 50 60 70 80 90 100 110 120 130 140 150 160 170 180 190 200 210 220 230 240 250 260 270 280 290 300 310 320 330 340 350 360 370 380 390 400 410 420 430 440 450 460 470 480 490 500 510 520 530 540 550 560 570 580 590 600 610 620 630 640 650 660 670 680 690 700 710 720 730 740 750 760 770 780 790 800 810 820 830 840 850 860 870 880 890 900 910 920 930 940 950 960 970 980 990 1000